import numpy as np
import pandas as pd
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/11-15-2021.csv')
us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/11-15-2021.csv')
confirmed_group_df = confirmed_df.groupby(by='Country/Region',as_index=False).sum()
deaths_group_df = deaths_df.groupby(by='Country/Region',as_index=False).sum()
recoveries_group_df = recoveries_df.groupby(by='Country/Region',as_index=False).sum()
active_group_df = pd.DataFrame(columns=[confirmed_group_df.columns])
active_group_df = deaths_group_df.copy()
for i in range(confirmed_group_df.shape[0]):
for j in range(3, confirmed_group_df.shape[1]):
active_group_df.iloc[i,j] = confirmed_group_df.iloc[i,j]-(recoveries_group_df.iloc[i,j]+deaths_group_df.iloc[i,j])
!pip install plotly
import plotly.graph_objects as go
country_data = go.Figure()
country_data.add_trace(go.Table(
header=dict(values=['Country','Confirmed','Active','Recovered','Deaths','Daily Increase','Mortality Rate'],
fill = dict(color='#A5B3F3'),
line_color='darkslategray',
align = ['left'] * 5),
cells=dict(values=[confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'],
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1]),
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: active_group_df[active_group_df['Country/Region']==x][active_group_df.columns[4:]].values.tolist()[0][-1]),
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: recoveries_group_df[recoveries_group_df['Country/Region']==x][recoveries_group_df.columns[4:]].values.tolist()[0][-600]),
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: deaths_group_df[deaths_group_df['Country/Region']==x][deaths_group_df.columns[4:]].values.tolist()[0][-1]),
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1]-confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-5]),
confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: (deaths_group_df[deaths_group_df['Country/Region']==x][deaths_group_df.columns[4:]].values.tolist()[0][-1]/confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1])*100).round(decimals=3)
],
fill = dict(color='#F0FCFD'),
line_color='darkslategray',
align = ['left'] * 5)))
country_data.update_layout(
#height=600, width=1100,
title_text="Country Stats for COVID 19",
title_x=0.5, title_font_size=20,
paper_bgcolor="mintcream")
country_data.show()
map_fit = go.Figure()
df_dict={
"Confirmed": [confirmed_group_df,"blues",True],
"Active": [active_group_df,"reds",False],
"Recovered": [recoveries_group_df,"greens",False],
"Deaths": [deaths_group_df,"gray_r",False],
"Daily_inc": [None, "oranges", False]
}
for filter_name in ['Confirmed','Active','Recovered','Deaths']:
map_fit.add_trace(go.Choropleth(locations=df_dict[filter_name][0]['Country/Region'],
z=df_dict[filter_name][0][confirmed_group_df.columns[-1]],
locationmode='country names', name=filter_name,
colorscale=df_dict[filter_name][1], showscale=False,
colorbar_title="# of Cases World wide", visible=df_dict[filter_name][2],
hoverinfo = 'all',
))
map_fit.add_trace(go.Choropleth(locations=confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'],
z=confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1]-confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-2]),
locationmode='country names', name='Daily increase',
colorscale=df_dict['Daily_inc'][1], showscale=False,
colorbar_title="# of new Cases World Wide COVID19", visible=df_dict['Daily_inc'][2],
hoverinfo = 'all',
))
map_fit.update_layout(
updatemenus=[
dict(
buttons=list(
[dict(label = 'Confirmed',
method = 'update',
args = [{'visible': [True, False, False, False, False]},
{'title': 'Confirmed',
'showlegend':True}]),
dict(label = 'Active',
method = 'update',
args = [{'visible': [False, True, False, False, False]},
{'title': 'Active',
'showlegend':True}]),
dict(label = 'Recovered',
method = 'update',
args = [{'visible': [False, False, True, False, False]},
{'title': 'Recovered',
'showlegend':True}]),
dict(label = 'Deaths',
method = 'update',
args = [{'visible': [False, False, False, True, False]},
{'title': 'Deaths',
'showlegend':True}]),
dict(label = 'Daily Increase',
method = 'update',
args = [{'visible': [False, False, False, False, True]},
{'title': 'Daily Increase',
'showlegend':True}]),
]),
type = "buttons",
direction="right",
# pad={"r": 10, "t": 40},
showactive=True,
x=-0.1,
xanchor="left",
y=1.1,
yanchor="top"
)
])
map_fit.update_xaxes(showticklabels=False)
map_fit.update_layout(
#height=600, width=1100,
title_text="# of Cases World wide", title_x=0.5, title_font_size=20,
legend=dict(orientation='h',yanchor='top',y=1.12,xanchor='right',x=1), paper_bgcolor="mintcream")
map_fit.show()
# k-means here
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
data={'Country': confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'],
'Confirmed':confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1]),
'Active': confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: active_group_df[active_group_df['Country/Region']==x][active_group_df.columns[4:]].values.tolist()[0][-1]),
'Recovered':confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: recoveries_group_df[recoveries_group_df['Country/Region']==x][recoveries_group_df.columns[4:]].values.tolist()[0][-600]),
'Deaths': confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: deaths_group_df[deaths_group_df['Country/Region']==x][deaths_group_df.columns[4:]].values.tolist()[0][-1]),
'Daily Increase': confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1]-confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-5]),
'Mortality Rate': confirmed_group_df.sort_values(by=confirmed_group_df.columns[-1], ascending=False)['Country/Region'].apply(lambda x: (deaths_group_df[deaths_group_df['Country/Region']==x][deaths_group_df.columns[4:]].values.tolist()[0][-1]/confirmed_group_df[confirmed_group_df['Country/Region']==x][confirmed_group_df.columns[4:]].values.tolist()[0][-1])*100).round(decimals=3)
}
# print(data)
# columns=['Country','Confirmed','Active','Recovered','Deaths','Daily Increase','Mortality Rate']
df = pd.DataFrame(data)
# df.head()
k_means = KMeans(n_clusters = 3)
y = k_means.fit_predict(df[['Confirmed', 'Active', 'Recovered', 'Deaths', 'Daily Increase', 'Mortality Rate']])
df['Cluster'] = y
print(df.head())
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
# plot something via PCA. Can be modified further.
cluster_data = df.loc[:, ['Confirmed', 'Active', 'Deaths', 'Daily Increase', 'Mortality Rate']]
# print(cluster_data.head())
reduced_data = PCA(n_components=2).fit_transform(cluster_data)
results = pd.DataFrame(reduced_data,columns=['pca1','pca2'])
sns.scatterplot(x="pca1", y="pca2", hue=df['Cluster'], data=results)
plt.title('K-means Clustering with 2 dimensions')
sns.set(rc={'figure.figsize':(20, 10)})
sns.set(style='white')
sns.despine(bottom = True, left = True)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0, title = "Cluster")
plt.show()
df['Active Rate'] = df['Active'] / df['Confirmed'] * 100
df['Recovered Rate'] = df['Recovered'] / df['Confirmed'] * 100
df['Death Rate'] = df['Deaths'] / df['Confirmed'] * 100
df.head()
df.describe()
import seaborn as sns
sns.kdeplot(df['Daily Increase'])
sns.kdeplot(df['Mortality Rate'])
sns.kdeplot(df['Active Rate'])
sns.kdeplot(df['Death Rate'])
%matplotlib inline
plt.style.use('ggplot')
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 1.5})
g = plt.subplots(figsize=(20,9))
g = sns.lineplot(x='Country', y='Daily Increase', data=df[:15], palette='Blues_d')
plt.title('Daily Increase per top 15 country')
%matplotlib inline
plt.style.use('ggplot')
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 1.5})
g = plt.subplots(figsize=(20,9))
g = sns.lineplot(x='Country', y='Mortality Rate', data=df[:15], palette='Blues_d')
plt.title('Mortality Rate per top 15 country')
%matplotlib inline
plt.style.use('ggplot')
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 1.5})
g = plt.subplots(figsize=(20,9))
g = sns.lineplot(x='Country', y='Active Rate', data=df[:15], palette='Blues_d')
plt.title('Active Rate per top 15 country')
%matplotlib inline
plt.style.use('ggplot')
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 1.5})
g = plt.subplots(figsize=(20,9))
g = sns.lineplot(x='Country', y='Death Rate', data=df[:15], palette='Blues_d')
plt.title('Death Rate per top 15 country')
%matplotlib inline
plt.style.use('ggplot')
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 1.5})
g = plt.subplots(figsize=(20,9))
g = sns.lineplot(x='Country', y='Recovered Rate', data=df[:15], palette='Blues_d')
plt.title('Recovered Rate per top 15 country')
result = df.copy()
result.head()
# result = result[:15]
# result
result = result.reset_index()
result.head()
from sklearn.preprocessing import StandardScaler
data = result[['Mortality Rate','Active Rate','Recovered Rate', 'Death Rate']]
sns.set(style="ticks")
sns.pairplot(data)
outliers_fraction=0.05
scaler = StandardScaler()
np_scaled = scaler.fit_transform(data)
data = pd.DataFrame(np_scaled).to_numpy()
print(data)
from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI
from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI
transformer_DL = DeepLogSKI()
transformer_DL.fit(data)
prediction_labels_DL = transformer_DL.predict(data)
prediction_score_DL = transformer_DL.predict_score(data)
print("Prediction Labels\n", prediction_labels_DL)
print("Prediction Score\n", prediction_score_DL)
# create a new column for storing the results of DeepLog method
result['anomaly_DeepLog'] = pd.Series(prediction_labels_DL.flatten())
result['anomaly_DeepLog'] = result['anomaly_DeepLog'].apply(lambda x: x == 1)
result['anomaly_DeepLog'] = result['anomaly_DeepLog'].astype(int)
result['anomaly_DeepLog'].value_counts()
result.head(15)
fig, ax = plt.subplots(figsize=(20,16))
#anomaly
a = result.loc[result['anomaly_DeepLog'] == 1]
ax.plot(result['Recovered Rate'], color='black', label = 'Recovered Rate', linewidth=1.5)
ax.scatter(a.index ,a['Recovered Rate'], color='red', label = 'Anomaly', s=40)
ax.plot(pd.Series(prediction_score_DL.flatten()*5), color='blue', label = 'Outlier Score', linewidth=0.5)
plt.legend()
plt.title("Anamoly Detection Using DeepLog")
plt.xlabel('Country Index')
plt.ylabel('Recovered Rate')
plt.show();
fig, ax = plt.subplots(figsize=(20,16))
#anomaly
a = result.loc[result['anomaly_DeepLog'] == 1]
ax.plot(result['Active Rate'], color='black', label = 'Active Rate', linewidth=1.5)
ax.scatter(a.index ,a['Active Rate'], color='red', label = 'Anomaly', s=40)
ax.plot(pd.Series(prediction_score_DL.flatten()*3), color='blue', label = 'Outlier Score', linewidth=0.5)
plt.legend()
plt.title("Anamoly Detection Using DeepLog")
plt.xlabel('Country Index')
plt.ylabel('Active Rate')
plt.show();
fig, ax = plt.subplots(figsize=(20,16))
#anomaly
a = result.loc[result['anomaly_DeepLog'] == 1]
ax.plot(result['Death Rate'], color='black', label = 'Death Rate', linewidth=1.5)
ax.scatter(a.index ,a['Death Rate'], color='red', label = 'Anomaly', s=40)
ax.plot(pd.Series(prediction_score_DL.flatten()*3), color='blue', label = 'Outlier Score', linewidth=0.5)
plt.legend()
plt.title("Anamoly Detection Using DeepLog")
plt.xlabel('Country Index')
plt.ylabel('Death Rate')
plt.show();
a
a['Country']
fig = plt.figure(figsize=(20,20))
ax = fig.add_subplot(111, projection='3d')
#ax.set_zlabel("x_composite_3")
outlier_index=list(a.index)
ax.scatter(data[:, 0], data[:, 1], zs=data[:, 2], s=15, lw=1, label="inliers",c="gray")
# Plot x's for the ground truth outliers
ax.scatter(data[outlier_index,0],data[outlier_index,1], data[outlier_index,2],
lw=10, s=15, c="red", label="outliers")
ax.legend()
plt.show()